Importing the Dataset & Packages

library(tidyverse)
library(maps) # The maps package has geographic information on all U.S states
library(ggmap)
library(tmap)
library(rgdal)
library(rgeos)
library(geojsonio)
library(viridis)
library(hrbrthemes)
library(tm)
library(sp)
library(DT)
library(devtools)
library(leaflet)
library(ggthemes)
library(RColorBrewer)
# use getwd() and setwd() to change view and change working directory
data<-read.csv("data/airbnb_listings/airbnb_listings.csv")

airbnbnyc <- data%>%
  select('id','host_id','host_listings_count','latitude','longitude','room_type','accommodates','bathrooms','bedrooms','price','neighbourhood_cleansed','neighbourhood_group_cleansed','availability_365','number_of_reviews','review_scores_rating','transit')

head(airbnbnyc)
##     id host_id host_listings_count latitude longitude       room_type
## 1 2539    2787                   6 40.64749 -73.97237    Private room
## 2 2595    2845                   5 40.75362 -73.98377 Entire home/apt
## 3 3647    4632                   1 40.80902 -73.94190    Private room
## 4 3831    4869                   1 40.68514 -73.95976 Entire home/apt
## 5 4989    7118                   1 40.76260 -73.99304 Entire home/apt
## 6 5022    7192                   1 40.79851 -73.94399 Entire home/apt
##   accommodates bathrooms bedrooms   price neighbourhood_cleansed
## 1            2         1        1 $149.00             Kensington
## 2            2         1        0 $225.00                Midtown
## 3            2         1        1 $150.00                 Harlem
## 4            3         1        1  $89.00           Clinton Hill
## 5            2         1        1 $105.00         Hell's Kitchen
## 6            1         1       NA  $80.00            East Harlem
##   neighbourhood_group_cleansed availability_365 number_of_reviews
## 1                     Brooklyn              365                 9
## 2                    Manhattan              331                44
## 3                    Manhattan              365                 0
## 4                     Brooklyn              182               258
## 5                    Manhattan               83                27
## 6                    Manhattan                0                 9
##   review_scores_rating
## 1                   98
## 2                   95
## 3                   NA
## 4                   90
## 5                   89
## 6                   93
##                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                    transit
## 1                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                     Very close to F and G trains and Express bus into NY.  The B and Q are closeby also.
## 2                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                   Apartment is located on 37th Street between 5th & 6th Avenue, just a few blocks from all subway connections. Closest Subways (in order of proximity to apartment (Website hidden by Airbnb) W: 34th Street & 6th Avenu (Website hidden by Airbnb) 3: 34th Street & 7th Avenue 7: 42nd & 5th Avenu (Website hidden by Airbnb) S: 42nd Street between Park & Lexington Avenue (Website hidden by Airbnb) E: 34th Street and 8th Avenue If coming by car, there is a parking garage on the block and free street parking.
## 3                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                         
## 4 B52 bus for a 10-minute ride to downtown Brooklyn is a few yards away on the corner; G train/Classon Avenue is 5 blocks away; C train is about 6 blocks to either the Clinton/Washington stop or Franklin Avenue stop.  There is on-street parking, alternate side is twice per week on the immediate block but only once per week on Classon.  From LaGuardia Airport, a taxi will cost $30-$35, but there is also a bus that will put you at the Jackson Heights subway station, and from there it's about 5 stops to catch the G train, which stops 5 blocks away.  From JFK, the taxi is closer to $40, but the AirTran can get you conveniently to the A/C line and the C train is about 6 blocks from here.   From JFK via subway/metro/train: From JFK take the AirTrain to Howard Beach to catch the A train toward Brooklyn/Manhattan.  Take the A train to Utica Avenue and go across that same platform to catch the C local train (you could also transfer at Nostrand but you would have to carry luggage downstairs to cat
## 5                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                       NYC subways and the Hudson River are each just a 10 min walk away.
## 6

1. Overall Location

a) Provide a map to show where in New York City AirBnB listings are located.

Read the neighbourhoods.geojson of NYC neighborhoods file, and nybb of NYC boroughs files in.

# read the initial shape file with neighbourhoods and borough data
nyc_neighborhoods <- readOGR(dsn="data/neighbourhoods.geojson")
## OGR data source with driver: GeoJSON 
## Source: "C:\Users\hs324\OneDrive\Desktop\Class_Files\04_2022Spring_CU\GR5063_DataViz\Assignment\hw02_AirbnbNYC\assignment-2-airbnb-HuanSunGo\data\neighbourhoods.geojson", layer: "neighbourhoods"
## with 233 features
## It has 2 fields
# convert the shape object so that it may be portrayed on a map 
nyc_neighborhoods <- spTransform(nyc_neighborhoods,CRS("+proj=longlat +datum=WGS84"))
nyc_neighborhoods<-fortify(nyc_neighborhoods)
## Regions defined for each Polygons
nyc_boroughs <- readOGR("data/nyc_boroughs_map/.","nybb")
## OGR data source with driver: ESRI Shapefile 
## Source: "C:\Users\hs324\OneDrive\Desktop\Class_Files\04_2022Spring_CU\GR5063_DataViz\Assignment\hw02_AirbnbNYC\assignment-2-airbnb-HuanSunGo\data\nyc_boroughs_map", layer: "nybb"
## with 5 features
## It has 4 fields
nyc_boroughs<- spTransform(nyc_boroughs,CRS("+proj=longlat +datum=WGS84"))
nyc_boroughs<- fortify(nyc_boroughs)
## Regions defined for each Polygons
# get nyc background map in with get_map
map_nyc <- get_map("New York City",zoom = 11,source = "stamen",maptype = "toner-lite")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=New%20York%20City&zoom=11&size=640x640&scale=2&maptype=terrain&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=New+York+City&key=xxx
## Source : http://tile.stamen.com/toner-lite/11/601/768.png
## Source : http://tile.stamen.com/toner-lite/11/602/768.png
## Source : http://tile.stamen.com/toner-lite/11/603/768.png
## Source : http://tile.stamen.com/toner-lite/11/604/768.png
## Source : http://tile.stamen.com/toner-lite/11/601/769.png
## Source : http://tile.stamen.com/toner-lite/11/602/769.png
## Source : http://tile.stamen.com/toner-lite/11/603/769.png
## Source : http://tile.stamen.com/toner-lite/11/604/769.png
## Source : http://tile.stamen.com/toner-lite/11/601/770.png
## Source : http://tile.stamen.com/toner-lite/11/602/770.png
## Source : http://tile.stamen.com/toner-lite/11/603/770.png
## Source : http://tile.stamen.com/toner-lite/11/604/770.png
## Source : http://tile.stamen.com/toner-lite/11/601/771.png
## Source : http://tile.stamen.com/toner-lite/11/602/771.png
## Source : http://tile.stamen.com/toner-lite/11/603/771.png
## Source : http://tile.stamen.com/toner-lite/11/604/771.png
# plot the map with ggmap, which returns a ggplot object
g <- ggmap(map_nyc,
           base_layer=ggplot(aes(x=long,y=lat),
                             legend=FALSE,
                             data=nyc_boroughs),
           extent = "normal", maprange=FALSE)+ 
  geom_polygon(aes(x=long, y=lat, group=group, fill=id,alpha=3), 
               size=0.5, color='#636363',alpha=0.3)+
  geom_point(data=airbnbnyc,aes(x=longitude,y=latitude),size=0.3, alpha=0.1, color="#2c7fb8")+
  theme_map()+
  theme(legend.position = "none")
g 

According to the map, the highly density area are mostly in Manhattan and Brooklyn, but we’ll get a closer look of the map with the density function in the below question.

b) Provide a map in which you summarize the density of the AirBnB listings and highlight the hot-spots for AirBnB locations. Make sure to annotate a few hot-spots on the map.

# have a closer look at metropolitan area 
map_nyc <- get_map("New York City",zoom = 12,source = "stamen",maptype = "toner-lite")
## Source : https://maps.googleapis.com/maps/api/staticmap?center=New%20York%20City&zoom=12&size=640x640&scale=2&maptype=terrain&key=xxx
## Source : https://maps.googleapis.com/maps/api/geocode/json?address=New+York+City&key=xxx
## Source : http://tile.stamen.com/toner-lite/12/1204/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1538.png
## Source : http://tile.stamen.com/toner-lite/12/1204/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1539.png
## Source : http://tile.stamen.com/toner-lite/12/1204/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1540.png
## Source : http://tile.stamen.com/toner-lite/12/1204/1541.png
## Source : http://tile.stamen.com/toner-lite/12/1205/1541.png
## Source : http://tile.stamen.com/toner-lite/12/1206/1541.png
## Source : http://tile.stamen.com/toner-lite/12/1207/1541.png
# highlight the density of hot airbnb spots in nyc
density<-ggmap(map_nyc)+
  stat_density2d(data = airbnbnyc, geom = "polygon",
  aes(x = longitude, y = latitude, fill=..level.., alpha=..level..)) + 
  scale_fill_distiller(palette=4, direction=1) +
  theme(legend.position="bottom")+
  theme_map()+
  ggplot2::annotate("text",x=-73.994508, y=40.763186, label="Hells Kitchen",
                   color="Dark Blue",fontface=4, size=3) +
  ggplot2::annotate("text",x=-73.98736, y=40.72527, label="East Village",
             color="Dark Blue",fontface=4, size=3)+
  ggplot2::annotate("text",x=-73.951996, y=40.71200, label="Williamsburg",
             color="Dark Blue",fontface=4, size=3)+
  theme(legend.position = "none")
  

density
## Warning: Removed 10503 rows containing non-finite values (stat_density2d).

It is not surprising to find that the southwestern part of Central Park area and East Village area has most house rented as Airbnb, but also interesting to find that Willimsburg that as the uprising popular neighborhood had gained much attention.

2. Renting out your apartment vs. permanent rentals.

An Airbnb host can set up a calendar for their listing so that it is only available for a few days or weeks a year. Other listings are available all year round (except for when it is already booked). Entire homes or apartments highly available and rented frequently year-round to tourists probably don’t have the owner present, are illegal, and more importantly, are displacing New Yorkers.

Hint: The variable availability_365: What part of the year is the property available to be rented is a possible choice to categorize rentals.

a) Choose a combination of both maps and non-mapping visualizations (graphs or tables) to explore where in NYC listings are available sporadically vs. year-round. Make sure to highlight the neighborhoods were most listings appear to be permanent or semi-permanent rentals.

- Graphic distribution of locations of year-round listings.

# set a definition for each case
airbnbnyc_case <- airbnbnyc %>%
  mutate(case=case_when(airbnbnyc$availability_365 >= 270 ~ "Permanent",
            airbnbnyc$availability_365 >= 180 ~ "Semi-Permanent",
            airbnbnyc$availability_365 < 180 ~ "Sporadically"))

# check which neighborhood has the most permanent rental
permanent_count <- airbnbnyc_case %>%
  group_by(neighbourhood_cleansed,case)%>%
  summarize(nbh_number=n())%>%
  mutate(group_sum=sum(nbh_number),by=neighbourhood_cleansed)%>%
  arrange(desc(group_sum))%>%
  rename(neighborhood=neighbourhood_cleansed,
         neighbor_case_total=nbh_number,
         neighbor_total=group_sum)%>%
  select(-by)
## `summarise()` has grouped output by 'neighbourhood_cleansed'. You can override using the `.groups` argument.
head(permanent_count)
## # A tibble: 6 x 4
## # Groups:   neighborhood [2]
##   neighborhood       case           neighbor_case_total neighbor_total
##   <chr>              <chr>                        <int>          <int>
## 1 Williamsburg       Permanent                      439           3944
## 2 Williamsburg       Semi-Permanent                 280           3944
## 3 Williamsburg       Sporadically                  3225           3944
## 4 Bedford-Stuyvesant Permanent                      732           3708
## 5 Bedford-Stuyvesant Semi-Permanent                 411           3708
## 6 Bedford-Stuyvesant Sporadically                  2565           3708
# create a table for presentation
datatable(permanent_count, 
          rownames=FALSE,colnames=c("Neighborhood","Case","Case Number in Neighborhood","Total Number in Neighborhood"),
          caption=htmltools::tags$caption("Listing Availability: Sporadically or Year-Round"), 
          #options=list(autoWidth = TRUE, dom = "ft", pageLength=10), 
          filter = list(position="top"))
# select the top 10 neighborhoods with most listings
permanent_count_top10 <- permanent_count%>%
  head(30)

# make a stacked bar chart for comparision
ggplot(data=permanent_count_top10,
       aes(x=reorder(neighborhood,neighbor_total),y=neighbor_total,
           fill=case))+
geom_bar(stat="identity")+
  theme_classic()+
  labs(x="Number of Listing", y='Neighborhood', title='Listing Availability: Sporadically or Year-Round')+
  theme(plot.title=element_text(hjust=0.5))+
  coord_flip()+
  theme_tufte(base_size = 13) +
  scale_fill_manual(values=c('#ece2f0','#a6bddb','#1c9099'))

- Mapping for locations of year-round listings.

g <- ggmap(map_nyc)+
  stat_density2d(data = airbnbnyc_case, geom = "polygon",
  aes(x = longitude, y = latitude, fill=..level.., alpha=..level..)) + 
  scale_fill_distiller(palette=4, direction=1) +
  theme_map()+
  facet_wrap(~case)+
  theme(legend.position="bottom")

g
## Warning: Removed 10503 rows containing non-finite values (stat_density2d).

pal = colorFactor("Set2", domain = airbnbnyc_case$case)
color_case=pal(airbnbnyc_case$case)

leaflet(airbnbnyc)%>%
  addProviderTiles("Stamen.TonerLite") %>%
  addCircles(color=color_case)%>%
  addLegend(pal = pal, values = ~airbnbnyc_case$case, title = "Case")
## Assuming "longitude" and "latitude" are longitude and latitude, respectively

b) Some hosts (identified by host_id) operate multiple rentals. Provide a data table of the the top hosts, and the followings:

  • total number of listings they are associated with
  • the average nightly price
  • the estimated average monthly total income from these listings
# 1) calculate the number of listing each host has
host_listing_number<-airbnbnyc%>%
  group_by(host_id)%>%
  summarize(number=n())%>%
  arrange(desc(number))

# convert the price column in numeric types
airbnbnyc$price=gsub("\\$","",airbnbnyc$price)
airbnbnyc$price<-as.numeric((airbnbnyc$price))
## Warning: NAs introduced by coercion
# 2) calculate the average nightly price  
host_nightly_avg<- airbnbnyc%>%
  group_by(host_id)%>%
  summarize(nightly_avg=round(mean(price),2))%>%
  arrange(desc(nightly_avg))
# join the above three newly created columns back onto the airbnbnyc file 
host_info<- airbnbnyc%>%
  left_join(host_listing_number,by=c("host_id"))%>%
  left_join(host_nightly_avg,by=c("host_id"))%>%
  select(host_id,availability_365,number,nightly_avg)%>%
  # here I think when calculating the estimate income, should only consider the days when the house is available to rent
  #3) calculate the estimate average monthly total income from the listings: should be the avg*(available_365)/12
  mutate(monthly_income=round(number*nightly_avg*availability_365/12),2)%>%
  select(host_id,number,nightly_avg,monthly_income)%>%
  group_by(host_id,number,nightly_avg)%>%
  mutate(monthly_income=round(mean(monthly_income),2))%>%
  arrange(desc(monthly_income))%>%
  distinct()

# create the datatable
datatable(host_info, 
          rownames=FALSE,colnames=c("Host Id","Listing Count","Nightly Avg Pirce","Monthly Avg Income"),
          caption=htmltools::tags$caption("Fun Facts about the Airbnb Hosts"), 
          #options=list(autoWidth = TRUE, dom = "ft", pageLength=10), 
          filter = list(position="top"))

3. Top Reviewed Rentals

Provide an interactive map which shows the Top 100 most expensive and Top 100 best reviewed rentals in NYC.

The map should differentiate these two groups and upon clicking on a point on the map should show some basic information (at least 3 pieces of information) in a tool tip.

# get the info of top100 most expensive and best review rentals
airbnbnyc_rank<-airbnbnyc%>%
  arrange(desc(price))%>%
  mutate(price_rank=row_number())%>%
  arrange(desc(review_scores_rating))%>%
  mutate(review_rank=row_number())

head(airbnbnyc_rank)
##         id   host_id host_listings_count latitude longitude       room_type
## 1  6969473  30247261                   1 40.72039 -73.99683 Entire home/apt
## 2 12040331  64454893                   2 40.73426 -73.99476 Entire home/apt
## 3 14590476  90256030                   1 40.74775 -73.99167 Entire home/apt
## 4 27047594    319077                   5 40.68550 -73.96112 Entire home/apt
## 5 32235802 241889662                   4 40.73742 -74.00484    Private room
## 6 32327326 241889662                   4 40.73697 -74.00343    Private room
##   accommodates bathrooms bedrooms price neighbourhood_cleansed
## 1            8       3.0        3   999           Little Italy
## 2            4       2.5        2   999      Greenwich Village
## 3            7       2.5        2   999                Chelsea
## 4            6       1.0        0   999           Clinton Hill
## 5            2       1.0        0   999           West Village
## 6            2       1.0        0   999           West Village
##   neighbourhood_group_cleansed availability_365 number_of_reviews
## 1                    Manhattan                0                 1
## 2                    Manhattan              166                11
## 3                    Manhattan                0                 1
## 4                     Brooklyn              363                 2
## 5                    Manhattan              329                 1
## 6                    Manhattan              332                 2
##   review_scores_rating
## 1                  100
## 2                  100
## 3                  100
## 4                  100
## 5                  100
## 6                  100
##                                                                                                                                                                                                                                                                                                    transit
## 1                                                                                                                                                                                                                                                almost every subway line at walking distance of the home.
## 2                                                                                                                                                                                                                                                                                     Many subways around.
## 3                                                                                      2 min walk to the 1, 2, N, Q, & R Trains. 5 min walk to the B,D,F,M Only 5 blocks from The Empire State Building and Herald Square!! Walking distance to Madison Square Garden, Bryant Park, & Madison Square Park.
## 4                                                                                                                                    Conveniently located next to the R train, 12-minute walk to the F and G lines and Atlantic Terminal hub (2, 3, 4, 5 , B, D, N, Q , R and W, and Long Island Railroad)
## 5 We recommend a yellow taxi in from airports or car service (call ahead) for in and out to airports. The closest Subway Station is at Eighth Avenue and 14th Street and the blue line subways A, C, and E trains. There is on-street parking available, however, we recommend two nearby Parking Garages.
## 6                                                                                                                                                                                                                                                                                                         
##   price_rank review_rank
## 1          2           1
## 2          5           2
## 3          7           3
## 4          9           4
## 5         12           5
## 6         14           6
leaflet(airbnbnyc_rank)%>%
  # prepare the base layers
  addTiles(group = "Toner") %>%
  addProviderTiles(providers$Stamen.TonerLite, group="Toner Lite") %>%
  # add first layer 
  addCircles(group="Top 100 Most Expensive", 
      data=subset(airbnbnyc_rank,airbnbnyc_rank$price_rank < 101),
      opacity = 1.0, stroke = TRUE,
      color = "#af8dc3", weight=1,
      popup = paste("AirBnb Id:",airbnbnyc_rank$id,"<br/>",  
        "Nightly Price:", airbnbnyc_rank$price, "$","<br/>", 
        "Accomodate:",  airbnbnyc_rank$accommodates, "People"))%>%
  # add second layer
  addCircles(group="Top 100 Best Reviews", 
      data=subset(airbnbnyc_rank,airbnbnyc_rank$review_rank < 101),
      opacity = 1.0, stroke = TRUE,
      color = "#7fbf7b", weight=1,
      popup = paste("AirBnb Id:",airbnbnyc_rank$id,"<br/>",  
        "Nightly Price:", airbnbnyc_rank$price, "$","<br/>", 
        "Accomodate:",  airbnbnyc_rank$accommodates, "People"))%>%
  # add layer control
  addLayersControl(
    baseGroups = c("OpenStreetMap","Toner Lite"),
    overlayGroups = c("Top 100 Most Expensive","Top 100 Best Reviews"),
    options = layersControlOptions(collapsed = TRUE) )
## Assuming "longitude" and "latitude" are longitude and latitude, respectively
## Assuming "longitude" and "latitude" are longitude and latitude, respectively